#################################################################################
### Title: Are People Willing to Trade Away Democracy for Desirable Outcomes? ###
### Authors: Jonathan A. Chu, Scott Williamson, Eddy S. F. Yeung              ###
### Content: Data cleaning for the American sample                            ###
### Date: September 20, 2025                                                  ###
#################################################################################

### Set-up ----
## Clean the working environment and set the working directory
rm(list = ls())
setwd("~/Desktop/democracy_tradeoff/replication") # change to your own working directory

## Load the required packages
library(tidyverse)
library(estimatr)
library(cregg)
library(expss)
library(cjdata)

## Read the dataset
df <- read_Qualtrics("raw_data_US.csv")

## Recode variable names for conjoint data
# Forced choice DV
df <- df %>% mutate(Q1.1 = case_when(
  is.na(`1_trade_top_force`) == FALSE ~ `1_trade_top_force`,
  is.na(`1_trade_mid_force`) == FALSE ~ `1_trade_mid_force`,
  is.na(`1_trade_btm_force`) == FALSE ~ `1_trade_btm_force`
))
table(df$Q1.1)

df <- df %>% mutate(Q1.4 = case_when(
  is.na(`2_trade_top_force`) == FALSE ~ `2_trade_top_force`,
  is.na(`2_trade_mid_force`) == FALSE ~ `2_trade_mid_force`,
  is.na(`2_trade_btm_force`) == FALSE ~ `2_trade_btm_force`
))
table(df$Q1.4)

df <- df %>% mutate(Q1.7 = case_when(
  is.na(`3_trade_top_force`) == FALSE ~ `3_trade_top_force`,
  is.na(`3_trade_mid_force`) == FALSE ~ `3_trade_mid_force`,
  is.na(`3_trade_btm_force`) == FALSE ~ `3_trade_btm_force`
))
table(df$Q1.7)

# Rating for Country A
df <- df %>% mutate(Q1.2 = case_when(
  is.na(`1_trade_top_a`) == FALSE ~ `1_trade_top_a`,
  is.na(`1_trade_mid_a`) == FALSE ~ `1_trade_mid_a`,
  is.na(`1_trade_btm_a`) == FALSE ~ `1_trade_btm_a`
))
table(df$Q1.2)

df <- df %>% mutate(Q1.5 = case_when(
  is.na(`2_trade_top_a`) == FALSE ~ `2_trade_top_a`,
  is.na(`2_trade_mid_a`) == FALSE ~ `2_trade_mid_a`,
  is.na(`2_trade_btm_a`) == FALSE ~ `2_trade_btm_a`
))
table(df$Q1.5)

df <- df %>% mutate(Q1.8 = case_when(
  is.na(`3_trade_top_a`) == FALSE ~ `3_trade_top_a`,
  is.na(`3_trade_mid_a`) == FALSE ~ `3_trade_mid_a`,
  is.na(`3_trade_btm_a`) == FALSE ~ `3_trade_btm_a`
))
table(df$Q1.8)

# Rating for Country B
df <- df %>% mutate(Q1.3 = case_when(
  is.na(`1_trade_top_b`) == FALSE ~ `1_trade_top_b`,
  is.na(`1_trade_mid_b`) == FALSE ~ `1_trade_mid_b`,
  is.na(`1_trade_btm_b`) == FALSE ~ `1_trade_btm_b`
))
table(df$Q1.3)

df <- df %>% mutate(Q1.6 = case_when(
  is.na(`2_trade_top_b`) == FALSE ~ `2_trade_top_b`,
  is.na(`2_trade_mid_b`) == FALSE ~ `2_trade_mid_b`,
  is.na(`2_trade_btm_b`) == FALSE ~ `2_trade_btm_b`
))
table(df$Q1.6)

df <- df %>% mutate(Q1.9 = case_when(
  is.na(`3_trade_top_b`) == FALSE ~ `3_trade_top_b`,
  is.na(`3_trade_mid_b`) == FALSE ~ `3_trade_mid_b`,
  is.na(`3_trade_btm_b`) == FALSE ~ `3_trade_btm_b`
))
table(df$Q1.9)

## Add attribute names
df$`F-1-1` <- df$`F-2-1` <- df$`F-3-1` <- "Leader Selection"
df$`F-1-2` <- df$`F-2-2` <- df$`F-3-2` <- "Civil Liberties"
df$`F-1-3` <- df$`F-2-3` <- df$`F-3-3` <- "Leader Constraints"
df$`F-1-4` <- df$`F-2-4` <- df$`F-3-4` <- "Corruption in Politics"
df$`F-1-5` <- df$`F-2-5` <- df$`F-3-5` <- "National Economy"
df$`F-1-6` <- df$`F-2-6` <- df$`F-3-6` <- "Respondent Wealth"
df$`F-1-7` <- df$`F-2-7` <- df$`F-3-7` <- "Public Safety"
df$`F-1-8` <- df$`F-2-8` <- df$`F-3-8` <- "Health Care"
df$`F-1-9` <- df$`F-2-9` <- df$`F-3-9` <- "Minority Treatment"
df$`F-1-10` <- df$`F-2-10` <- df$`F-3-10` <- "Respondent Identity"

## Recode variables of respondent characteristics
# Age (1 = older than 40, 0 = younger than 40)
df <- df %>% mutate(age_bin = case_when(
  age < 40 ~ 1,
  age > 40 ~ 0
))
df$age_bin <- factor(df$age_bin, 0:1, c("Older", "Younger Than 40"))
table(df$age_bin)

# Minority status (1 = minority; 0 = otherwise)
df <- df %>% mutate(minority_bin = case_when(
  minority == "Yes" ~ 1,
  minority == "No" | minority == "Not sure" ~ 0
))
df$minority_bin <- factor(df$minority_bin, 0:1, c("Non-Minority", "Minority"))
table(df$minority_bin)

# Gender (1 = female; 0 = male)
df <- df %>% rename(gender5 = gender)
df <- df %>% mutate(gender_bin = case_when(
  gender5 == "Female" ~ 1,
  gender5 == "Male" ~ 0
))
df$gender_bin <- factor(df$gender_bin, 0:1, c("Male", "Female"))
table(df$gender_bin)

# Self-reported political ideology (1 = right; 0 = left; NA = neither)
df <- df %>% mutate(ideo_bin = case_when(
  political == "Strongly Left" |
    political == "Moderately Left" |
    political == "Somewhat Left" ~ 0,
  political == "Strongly Right" |
    political == "Moderately Right" |
    political == "Somewhat      Right" ~ 1
))
df$ideo_bin <- factor(df$ideo_bin, 0:1, c("Leftwing", "Rightwing"))
table(df$ideo_bin)

# Education (1 = college; 0 = no college)
df <- df %>% mutate(edu_bin = case_when(
  edu == "Less than high school" |
    edu == "High school or equivalent" |
    edu == "Some college, no degree" |
    edu == "Associate's Degree (AA)" ~ 0,
  edu == "Bachelor's Degree (BA, BS, BBA)" |
    edu == "Advanced degree (MA, MS, MBA, PhD, JD, MD, etc.)" ~ 1
))
df$edu_bin <- factor(df$edu_bin, 0:1, c("No College", "College"))
table(df$edu_bin)

## Socioeconomic status (1 = high; 0 = low)
df <- df %>% mutate(SES = case_when(
  ladder == "1 (Lowest)" |
    ladder == "2" | ladder == "3" | ladder == "4" | ladder == "5" ~ 0,
  ladder == "10 (Highest)" |
    ladder == "6" | ladder == "7" | ladder == "8" | ladder == "9" ~ 1
))
df$SES <- factor(df$SES, 0:1, c("Low SES", "High SES"))
table(df$SES)

## Reshape the dataset into wide format
df_cj <- reshape_conjoint(df, ResponseID, paste0("Q1.", seq(from = 1, to = 9, by = 3)))

## Reorder the factors
# Leader selection
df_cj$`Leader Selection` <- 
  factor(df_cj$`Leader Selection`,
         levels = c("Military coups", "Hereditary succession", "A small group of unelected elites", "Unfair elections", "Free and fair elections"),
         labels = c("Military coup", "Hereditary succession", "Unelected elites", "Unfair elections", "Free and fair elections"))

# Civil liberties
df_cj$`Civil Liberties` <- 
  factor(df_cj$`Civil Liberties`,
         levels = c("Cannot express themselves and organize without severe government repression", "Can express themselves and organize but face some risk of government repression", "Can express themselves and organize freely"),
         labels = c("Repressed", "Partially free", "Free"))

# Leader constraints
df_cj$`Leader Constraints` <- 
  factor(df_cj$`Leader Constraints`,
         levels = c("Can almost always bypass the legislature and courts' authority", "Can sometimes bypass the legislature and courts' authority", "Must respect the legislature and courts' authority"),
         labels = c("Unconstrained", "Partially constrained", "Constrained"))

# Corruption in politics
df_cj$`Corruption in Politics` <- 
  factor(df_cj$`Corruption in Politics`,
         levels = c("All the time", "Sometimes", "Very rarely"),
         labels = c("Prevalent", "Occasional", "Rare"))

# National economy
df_cj$`National Economy` <- 
  factor(df_cj$`National Economy`,
         levels = c("Low-income", "Medium-income", "High-income"),
         labels = c("Low income", "Middle income", "High income"))

# Respondent wealth
df_cj$`Respondent Wealth` <- 
  factor(df_cj$`Respondent Wealth`,
         levels = c("Poorer than most", "About average", "Wealthier than most"),
         labels = c("Poor", "Average", "Wealthy"))

# Health care
df_cj$`Health Care` <- 
  factor(df_cj$`Health Care`,
         levels = c("Accessible only to people with money or connections", "Accessible to most people"),
         labels = c("For the privileged", "Mostly accessible"))

# Public safety
df_cj$`Public Safety` <- 
  factor(df_cj$`Public Safety`,
         levels = c("Very dangerous", "Somewhat dangerous", "Somewhat safe", "Very safe"))

# Minority treatment
df_cj$`Minority Treatment` <- 
  factor(df_cj$`Minority Treatment`,
         levels = c("Treated unfairly by most people", "Treated fairly by some people but unfairly by others", "Treated fairly by most people"),
         labels = c("Mostly unfair", "Sometimes unfair", "Fairly treated"))

# Respondent identity
df_cj$`Respondent Identity` <- 
  factor(df_cj$`Respondent Identity`,
         levels = c("Put you in the smallest minority group", "Put you in the second largest group", "Put you in the largest majority group"),
         labels = c("Minority", "Second largest", "Majority"))

### Save the cleaned dataset ----
## Incorporate covariates
df <- df %>% rename(id = ResponseId)
df_cj <- merge(df_cj, df, by = "id")

## Indicate the country
df_cj$country <- "US"

## Export the dataset
write.csv(df_cj, "df_US.csv", row.names = FALSE)
